2 // GetMetadataForHTMLLog-Additions.m
3 // AdiumSpotlightImporter
5 // Created by Evan Schoenberg on 5/25/06.
8 #import "GetMetadataForHTMLLog-Additions.h"
10 //From LMX. Included under the BSD license. http://trac.adiumx.com/wiki/LMXParser
11 static BOOL getSurrogatesForUnicodeScalarValue(const UTF32Char scalar, unichar *outHigh, unichar *outLow);
14 * @brief These additions are all from AIUtilities
16 * The spotlight importer should include this file to get these specific additions.
17 * If the GetMetadataForHTMLLog class is used in a situation in which AIUtilities is linked in already, it is
18 * not necessary to include this implementation file.
20 @implementation NSScanner (AdiumSpotlightImporterAdditions)
22 - (BOOL)scanUnsignedInt:(unsigned int *)unsignedIntValue
24 //skip characters if necessary
25 NSCharacterSet *skipSet = [self charactersToBeSkipped];
26 [self setCharactersToBeSkipped:nil];
27 [self scanCharactersFromSet:skipSet intoString:NULL];
28 [self setCharactersToBeSkipped:skipSet];
30 NSString *string = [self string];
31 NSRange range = NSMakeRange([self scanLocation], 0);
32 register unsigned length = [string length] - range.location; //register because it is used in the loop below.
33 range.length = length;
35 unichar *buf = malloc(length * sizeof(unichar));
36 [string getCharacters:buf range:range];
38 register unsigned i = 0;
40 if (length && (buf[i] == '+')) {
43 if (i >= length) return NO;
44 if ((buf[i] < '0') || (buf[i] > '9')) return NO;
48 if ((buf[i] >= '0') && (buf[i] <= '9')) {
50 total += buf[i] - '0';
56 [self setScanLocation:i];
57 *unsignedIntValue = total;
64 @implementation NSString (AdiumSpotlightImporterAdditions)
66 BOOL AIGetSurrogates(UTF32Char in, UTF16Char *outHigh, UTF16Char *outLow)
69 if (outHigh) *outHigh = 0;
70 if (outLow) *outLow = in;
74 UTF32LowShiftToUTF16High = 10,
75 UTF32HighShiftToUTF16High,
76 UTF16HighMask = 31, //0b0000 0111 1100 0000
77 UTF16LowMask = 63, //0b0000 0000 0011 1111
78 UTF32LowMask = 1023, //0b0000 0011 1111 1111
79 UTF16HighAdditiveMask = 55296, //0b1101 1000 0000 0000
80 UTF16LowAdditiveMask = 56320, //0b1101 1100 0000 0000
85 ((in >> UTF32HighShiftToUTF16High) & UTF16HighMask) \
86 | ((in >> UTF32LowShiftToUTF16High) & UTF16LowMask) \
87 | UTF16HighAdditiveMask;
91 *outLow = (in & UTF32LowMask) | UTF16LowAdditiveMask;
99 * @brief Read a string from a file, assuming it to be UTF8
101 * If it can not be read as UTF8, it will be read as ASCII.
103 + (NSString *)stringWithContentsOfUTF8File:(NSString *)path
107 if ((floor(kCFCoreFoundationVersionNumber) > kCFCoreFoundationVersionNumber10_3)) {
108 NSError *error = nil;
110 string = [NSString stringWithContentsOfFile:path
111 encoding:NSUTF8StringEncoding
117 if ([[error domain] isEqualToString:NSCocoaErrorDomain]) {
118 int errorCode = [error code];
120 //XXX - I'm sure these constants are defined somewhere, but I can't find them. -eds
121 if (errorCode == 260) {
126 } else if (errorCode == 261) {
127 /* Reason: File could not be opened using text encoding Unicode (UTF-8).
128 * Description: Text encoding Unicode (UTF-8) is not applicable.
130 * We couldn't read the file as UTF8. Let the system try to determine the encoding.
132 NSError *newError = nil;
134 string = [NSString stringWithContentsOfFile:path
135 encoding:NSASCIIStringEncoding
138 //If there isn't a new error, we recovered reasonably successfully...
146 NSLog(@"Error reading %@:\n%@; %@.",path,
147 [error localizedDescription], [error localizedFailureReason]);
152 NSData *data = [NSData dataWithContentsOfFile:path];
155 string = [[[NSString alloc] initWithData:data
156 encoding:NSUTF8StringEncoding] autorelease];
158 string = [[[NSString alloc] initWithData:data
159 encoding:NSASCIIStringEncoding] autorelease];
163 NSLog(@"Error reading %@",path);
174 //stringByUnescapingFromXMLWithEntities: was written by Peter Hosey and is explicitly released under the BSD license.
176 Copyright © 2006 Peter Hosey
179 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
180 Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
181 Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
182 Neither the name of Peter Hosey nor the names of his contributors may be used to endorse or promote products derived from this software without specific prior written permission.
184 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
186 - (NSString *)stringByUnescapingFromXMLWithEntities:(NSDictionary *)entities
188 if (floor(NSAppKitVersionNumber) > NSAppKitVersionNumber10_3) {
189 return [(NSString *)CFXMLCreateStringByUnescapingEntities(kCFAllocatorDefault, (CFStringRef)self, (CFDictionaryRef)entities) autorelease];
194 static const unichar nbsp = 0xa0;
195 entities = [NSDictionary dictionaryWithObjectsAndKeys:
201 [NSString stringWithCharacters:  length:1], @"nbsp",
205 unsigned len = [self length];
206 NSMutableString *result = [NSMutableString stringWithCapacity:len];
207 NSScanner *scanner = [NSScanner scannerWithString:self];
208 [scanner setCharactersToBeSkipped:[NSCharacterSet characterSetWithRange:(NSRange){ 0, 0 }]];
210 NSString *chunk = nil;
211 while (YES) { //Actual condition is below.
213 if ([scanner scanUpToString:@"&" intoString:&chunk]) {
214 [result appendString:chunk];
216 [scanner scanString:@"&" intoString:NULL];
219 if ([scanner scanLocation] >= len)
222 if ([scanner scanString:@"#" intoString:NULL]) {
223 NSString *hexIdentifier = nil;
224 if ([scanner scanString:@"x" intoString:&hexIdentifier] || [scanner scanString:@"X" intoString:&hexIdentifier]) {
226 unsigned unichar32 = 0xffff;
227 if (![scanner scanHexInt:&unichar32]) {
228 [result appendFormat:@"&#%@", hexIdentifier];
229 } else if (![scanner scanString:@";" intoString:NULL]) {
230 [result appendFormat:@"&#%@%u", hexIdentifier, unichar32];
233 if (getSurrogatesForUnicodeScalarValue(unichar32, &high, &low)) {
234 [result appendFormat:@"%C%C", high, low];
236 [result appendFormat:@"%C", low];
240 //Not hex. Hopefully decimal.
241 int unichar32 = 65535; //== 0xffff
242 if (![scanner scanInt:&unichar32]) {
243 [result appendString:@"&#"];
244 } else if (![scanner scanString:@";" intoString:NULL]) {
245 [result appendFormat:@"&#%i", unichar32];
248 if (getSurrogatesForUnicodeScalarValue(unichar32, &high, &low)) {
249 [result appendFormat:@"%C%C", high, low];
251 [result appendFormat:@"%C", low];
256 //Not a numeric entity. Should be a named entity.
257 NSString *entityName = nil;
258 if (![scanner scanUpToString:@";" intoString:&entityName]) {
259 [result appendString:@"&"];
261 //Strip the semicolon.
262 NSString *entity = [entities objectForKey:entityName];
264 [result appendString:entity];
267 NSLog(@"-[NSString(AIStringAdditions) stringByUnescapingFromXMLWithEntities]: Named entity %@ unknown.", entityName);
269 [scanner scanString:@";" intoString:NULL];
274 return [NSString stringWithString:result];
280 static BOOL getSurrogatesForUnicodeScalarValue(const UTF32Char scalar, unichar *outHigh, unichar *outLow) {
281 if(scalar <= 0xffff) {
289 //note: names uuuuu, wwww, and xxxxx+ are taken from the Unicode book (section 3.9, table 3-4).
296 unsigned xxxxxxxxxx: 10;
298 } componentsUnion = {
305 unsigned highPrefix: 6;
312 .highPrefix = 0x36, //0b110110
313 .wwww = componentsUnion.components.uuuuu - 1,
314 .xxxxxx = componentsUnion.components.xxxxxx,
317 *outHigh = highUnion.codeUnit;
323 unsigned lowPrefix: 6;
324 unsigned xxxxxxxxxx: 10;
329 .lowPrefix = 0x37, //0b110111
330 .xxxxxxxxxx = componentsUnion.components.xxxxxxxxxx,
333 *outLow = lowUnion.codeUnit;